library(brms)
library(data.table)

# SET WORKING DIRECTORY
setwd("C:/Users/drmil/Dropbox/White House Lobbying/3YP/APSR Final/Replication Archive/visitor_logs_analyses")

################################################################################

# FIGURE 4: OBAMA ENGAGEMENT LOGIT

# READING IN DATA FILE TO OBTAIN VARIABLE VALUES FOR PREDICTIONS
obama_logs_analysis <- fread("obama_logs_analysis.csv", header = TRUE, 
                               stringsAsFactors = FALSE)

load("tableSI6_col2.RData")

# CREATING DATA FRAME FOR PREDICTIONS.  EACH ROW IS ONE OF THE INTEREST-TIME
# PERIOD OBSERVATIONS IN THE ORIGINAL ANALYSIS.  RESOURCES/PARTISAN ALIGNMENT
# ARE SET TO VALUES OF INTEREST AS INDICATED IN FIGURE 4, ALL OTHER VARIABLES
# ARE SET TO THEIR OBSERVED VALUES.

newdata <- data.frame(lobbyexp_1l=c(rep(quantile(obama_logs_analysis$lobbyexp_1l, 
                                                 probs = c(0.25), na.rm = TRUE), 
                                        dim(obama_logs_analysis)[1]),
                                    rep(quantile(obama_logs_analysis$lobbyexp_1l, 
                                                 probs = c(0.50), na.rm = TRUE), 
                                        dim(obama_logs_analysis)[1]),
                                    rep(quantile(obama_logs_analysis$lobbyexp_1l, 
                                                 probs = c(0.75), na.rm = TRUE), 
                                        dim(obama_logs_analysis)[1]),
                                    rep(obama_logs_analysis$lobbyexp_1l, 7)),
                      inhouse_lobby=obama_logs_analysis$inhouse_lobby,
                      any_visits_leq0_1l=obama_logs_analysis$any_visits_leq0_1l, 
                      industry_pid=c(rep(obama_logs_analysis$industry_pid, 7), 
                                     rep("I", dim(obama_logs_analysis)[1]), 
                                     rep("D", dim(obama_logs_analysis)[1]), 
                                     rep("R", dim(obama_logs_analysis)[1])),
                      made_donations_l1tol8=c(rep(obama_logs_analysis$made_donations_l1tol8,3), 
                                              rep(0,dim(obama_logs_analysis)[1]), 
                                              rep(1,dim(obama_logs_analysis)[1]*3),
                                              rep(obama_logs_analysis$made_donations_l1tol8,3)), 
                      amt_donations_1ltol8=c(rep(obama_logs_analysis$amt_donations_1ltol8,3), 
                                             rep(0, dim(obama_logs_analysis)[1]),
                                             rep(quantile(obama_logs_analysis$amt_donations_1ltol8[which(obama_logs_analysis$made_donations_l1tol8==1)], 
                                                          probs = c(0.25), 
                                                          na.rm = TRUE), dim(obama_logs_analysis)[1]),
                                             rep(quantile(obama_logs_analysis$amt_donations_1ltol8[which(obama_logs_analysis$made_donations_l1tol8==1)], 
                                                          probs = c(0.50), 
                                                          na.rm = TRUE), dim(obama_logs_analysis)[1]),
                                             rep(quantile(obama_logs_analysis$amt_donations_1ltol8[which(obama_logs_analysis$made_donations_l1tol8==1)], 
                                                          probs = c(0.75), 
                                                          na.rm = TRUE), dim(obama_logs_analysis)[1]),
                                             rep(obama_logs_analysis$amt_donations_1ltol8,3)),
                      ACC=obama_logs_analysis$ACC,
                      ADV=obama_logs_analysis$ADV,
                      AER=obama_logs_analysis$AER,
                      AGR=obama_logs_analysis$AGR,
                      ALC=obama_logs_analysis$ALC,
                      ANI=obama_logs_analysis$ANI,
                      APP=obama_logs_analysis$APP,
                      ART=obama_logs_analysis$ART,
                      AUT=obama_logs_analysis$AUT,
                      AVI=obama_logs_analysis$AVI,
                      BAN=obama_logs_analysis$BAN,
                      BEV=obama_logs_analysis$BEV,
                      BNK=obama_logs_analysis$BNK,
                      BUD=obama_logs_analysis$BUD,
                      CAW=obama_logs_analysis$CAW,
                      CDT=obama_logs_analysis$CDT,
                      CHM=obama_logs_analysis$CHM,
                      CIV=obama_logs_analysis$CIV,
                      COM=obama_logs_analysis$COM,
                      CON=obama_logs_analysis$CON,
                      CPI=obama_logs_analysis$CPI,
                      CPT=obama_logs_analysis$CPT,
                      CSP=obama_logs_analysis$CSP,
                      DEF=obama_logs_analysis$DEF,
                      DIS=obama_logs_analysis$DIS,
                      DOC=obama_logs_analysis$DOC,
                      ECN=obama_logs_analysis$ECN,
                      EDU=obama_logs_analysis$EDU,
                      ENG=obama_logs_analysis$ENG,
                      ENV=obama_logs_analysis$ENV,
                      FAM=obama_logs_analysis$FAM,
                      FIN=obama_logs_analysis$FIN,
                      FIR=obama_logs_analysis$FIR,
                      FOO=obama_logs_analysis$FOO,
                      FOR=obama_logs_analysis$FOR,
                      FUE=obama_logs_analysis$FUE,
                      GAM=obama_logs_analysis$GAM,
                      GOV=obama_logs_analysis$GOV,
                      HCR=obama_logs_analysis$HCR,
                      HOM=obama_logs_analysis$HOM,
                      HOU=obama_logs_analysis$HOU,
                      IMM=obama_logs_analysis$IMM,
                      IND=obama_logs_analysis$IND,
                      INS=obama_logs_analysis$INS,
                      INT=obama_logs_analysis$INT,
                      LAW=obama_logs_analysis$LAW,
                      LBR=obama_logs_analysis$LBR,
                      MAN=obama_logs_analysis$MAN,
                      MAR=obama_logs_analysis$MAR,
                      MED=obama_logs_analysis$MED,
                      MIA=obama_logs_analysis$MIA,
                      MIN=obama_logs_analysis$MIN,
                      MMM=obama_logs_analysis$MMM,
                      MON=obama_logs_analysis$MON,
                      NAT=obama_logs_analysis$NAT,
                      PHA=obama_logs_analysis$PHA,
                      POS=obama_logs_analysis$POS,
                      REL=obama_logs_analysis$REL,
                      RES=obama_logs_analysis$RES,
                      RET=obama_logs_analysis$RET,
                      ROD=obama_logs_analysis$ROD,
                      RRR=obama_logs_analysis$RRR,
                      SCI=obama_logs_analysis$SCI,
                      SMB=obama_logs_analysis$SMB,
                      SPO=obama_logs_analysis$SPO,
                      TAR=obama_logs_analysis$TAR,
                      TAX=obama_logs_analysis$TAX,
                      TEC=obama_logs_analysis$TEC,
                      TOB=obama_logs_analysis$TOB,
                      TOR=obama_logs_analysis$TOR,
                      TOU=obama_logs_analysis$TOU,
                      TRA=obama_logs_analysis$TRA,
                      TRD=obama_logs_analysis$TRD,
                      TRF=obama_logs_analysis$TRF,
                      TRU=obama_logs_analysis$TRU,
                      UNM=obama_logs_analysis$UNM,
                      URB=obama_logs_analysis$URB,
                      UTI=obama_logs_analysis$UTI,
                      VET=obama_logs_analysis$VET,
                      WAS=obama_logs_analysis$WAS,
                      WEL=obama_logs_analysis$WEL)

# CALCULATING PREDICTED VALUES THAT CORRESPOND WITH EACH RESOURCES/PARTISAN
# ALIGNMENT SETTING. PLEASE NOTE THAT THIS CODE DIFFERS FROM THAT IN fig3.R,
# WHICH CALCULATES ALL THE FITTED VALUES AND THEN EXTRACTS THOSE CORRESPONDING
# WITH EACH RESOURCES/PARTISAN ALIGNMENT SETTING; THE DIFFERENCE IN CODING IS A 
# RESULT OF MEMORY LIMITS IN R (BOTH METHODS, IF POSSIBLE TO CONDUCT, WOULD
# PRODUCE THE SAME END RESULTS).

# AS NOTED IN SI.37, I DO NOT UTILIZE VARYING INTERCEPTS IN THIS CALCULATION 
# (I.E., re_formula = NA); INCLUDING THESE VARYING INTERCEPTS DOES NOT AFFECT 
# THE SUBSTANTIVE CONCLUSIONS DRAWN FROM THE PREDICTED VALUES

predicted_values_25exp <- as.data.frame(fitted(tableSI6_col2, newdata[1:306766,], summary = FALSE, re_formula = NA))
predicted_values_25exp_mean <- rowMeans(predicted_values_25exp)
rm(predicted_values_25exp)

predicted_values_50exp <- as.data.frame(fitted(tableSI6_col2, newdata[306767:613532,], summary = FALSE, re_formula = NA))
predicted_values_50exp_mean <- rowMeans(predicted_values_50exp)
rm(predicted_values_50exp)

predicted_values_75exp <- as.data.frame(fitted(tableSI6_col2, newdata[613533:920298,], summary = FALSE, re_formula = NA))
predicted_values_75exp_mean <- rowMeans(predicted_values_75exp)
rm(predicted_values_75exp)

predicted_values_0contrib <- as.data.frame(fitted(tableSI6_col2, newdata[920299:1227064,], summary = FALSE, re_formula = NA))
predicted_values_0contrib_mean <- rowMeans(predicted_values_0contrib)
rm(predicted_values_0contrib)

predicted_values_25contrib <- as.data.frame(fitted(tableSI6_col2, newdata[1227065:1533830,], summary = FALSE, re_formula = NA))
predicted_values_25contrib_mean <- rowMeans(predicted_values_25contrib)
rm(predicted_values_25contrib)

predicted_values_50contrib <- as.data.frame(fitted(tableSI6_col2, newdata[1533831:1840596,], summary = FALSE, re_formula = NA))
predicted_values_50contrib_mean <- rowMeans(predicted_values_50contrib)
rm(predicted_values_50contrib)

predicted_values_75contrib <- as.data.frame(fitted(tableSI6_col2, newdata[1840597:2147362,], summary = FALSE, re_formula = NA))
predicted_values_75contrib_mean <- rowMeans(predicted_values_75contrib)
rm(predicted_values_75contrib)

predicted_values_I <- as.data.frame(fitted(tableSI6_col2, newdata[2147363:2454128,], summary = FALSE, re_formula = NA))
predicted_values_I_mean <- rowMeans(predicted_values_I)
rm(predicted_values_I)

predicted_values_D <- as.data.frame(fitted(tableSI6_col2, newdata[2454129:2760894,], summary = FALSE, re_formula = NA))
predicted_values_D_mean <- rowMeans(predicted_values_D)
rm(predicted_values_D)

predicted_values_R <- as.data.frame(fitted(tableSI6_col2, newdata[2760895:3067660,], summary = FALSE, re_formula = NA))
predicted_values_R_mean <- rowMeans(predicted_values_R)
rm(predicted_values_R)

predicted_dists <- cbind(predicted_values_25exp_mean, predicted_values_50exp_mean, 
                         predicted_values_75exp_mean, predicted_values_0contrib_mean, 
                         predicted_values_25contrib_mean, predicted_values_50contrib_mean,
                         predicted_values_75contrib_mean, predicted_values_I_mean,
                         predicted_values_D_mean, predicted_values_R_mean)

pes <- apply(predicted_dists, MARGIN = 2, FUN = function(x) mean(x))
ci_25 <-apply(predicted_dists, MARGIN = 2, FUN = function(x) quantile(x, probs = c(0.025)))
ci_975 <-apply(predicted_dists, MARGIN = 2, FUN = function(x) quantile(x, probs = c(0.975)))

# TO DETERMINE WHETHER THESE DISTRIBUTIONS OF PREDICTED VALUES ARE
# DISTINGUISHABLE FROM OTHER ANOTHER, WE ALSO NEED TO CALCULATE THE DIFFERENCES
# IN THE DISTRIBUTIONS WE WANT TO COMPARE AND OBTAIN THE MEAN AND 0.025/0.975 
# QUANTILE VALUES.  THE FOLLOWING CODE DOES THIS FOR EACH OF THE COMPARISONS
# MADE IN THE RIGHT PANE OF FIGURE 4.

diff_quants <- data.frame("pe"=numeric(0), "ci_25"=numeric(0), "ci_975"=numeric(0))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN EXPENDITURES ARE SET TO THE FIRST
# QUARTILE VALUE VS. THE SECOND QUARTILE VALUE
predicted_diff_1st_to_2nd_exp <- predicted_dists[,2] - predicted_dists[,1]
mean(predicted_diff_1st_to_2nd_exp)
quantile(predicted_diff_1st_to_2nd_exp, probs = c(0.025, 0.975))

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_1st_to_2nd_exp), 
                                             "ci_25"=quantile(predicted_diff_1st_to_2nd_exp, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_1st_to_2nd_exp, probs = c(0.975))))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN EXPENDITURES ARE SET TO THE SECOND
# QUARTILE VALUE VS. THE THIRD QUARTILE VALUE
predicted_diff_2nd_to_3rd_exp <- predicted_dists[,3] - predicted_dists[,2]
mean(predicted_diff_2nd_to_3rd_exp)
quantile(predicted_diff_2nd_to_3rd_exp, probs = c(0.025, 0.975))

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_2nd_to_3rd_exp), 
                                             "ci_25"=quantile(predicted_diff_2nd_to_3rd_exp, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_2nd_to_3rd_exp, probs = c(0.975))))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN CONTRIBUTIONS ARE SET TO NONE VS.
# THE FIRST QUARTILE VALUE
predicted_diff_0_to_1st_contrib <- predicted_dists[,5] - predicted_dists[,4]
mean(predicted_diff_0_to_1st_contrib)
quantile(predicted_diff_0_to_1st_contrib, probs = c(0.025, 0.975))

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_0_to_1st_contrib), 
                                             "ci_25"=quantile(predicted_diff_0_to_1st_contrib, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_0_to_1st_contrib, probs = c(0.975))))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN CONTRIBUTIONS ARE SET TO THE FIRST 
# QUARTILE VALUE VS. THE SECOND QUARTILE VALUE
predicted_diff_1st_to_2nd_contrib <- predicted_dists[,6] - predicted_dists[,5]
mean(predicted_diff_1st_to_2nd_contrib)
quantile(predicted_diff_1st_to_2nd_contrib, probs = c(0.025, 0.975))

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_1st_to_2nd_contrib), 
                                             "ci_25"=quantile(predicted_diff_1st_to_2nd_contrib, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_1st_to_2nd_contrib, probs = c(0.975))))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN CONTRIBUTIONS ARE SET TO THE SECOND 
# QUARTILE VALUE VS. THE THIRD QUARTILE VALUE
predicted_diff_2nd_to_3rd_contrib <- predicted_dists[,7] - predicted_dists[,6]
mean(predicted_diff_2nd_to_3rd_contrib)
quantile(predicted_diff_2nd_to_3rd_contrib, probs = c(0.025, 0.975))

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_2nd_to_3rd_contrib), 
                                             "ci_25"=quantile(predicted_diff_2nd_to_3rd_contrib, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_2nd_to_3rd_contrib, probs = c(0.975))))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN PARTISAN ALIGNMENT IS SET TO 
# INDEPENDENT VS. DEMOCRATIC
predicted_diff_I_to_D <- predicted_dists[,9] - predicted_dists[,8]
mean(predicted_diff_I_to_D)
quantile(predicted_diff_I_to_D, probs = c(0.025, 0.975))

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_I_to_D), 
                                             "ci_25"=quantile(predicted_diff_I_to_D, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_I_to_D, probs = c(0.975))))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN PARTISAN ALIGNMENT IS SET TO 
# REPUBLICAN VS. DEMOCRATIC
predicted_diff_R_to_D <- predicted_dists[,9] - predicted_dists[,10]
mean(predicted_diff_R_to_D)
quantile(predicted_diff_R_to_D, probs = c(0.025, 0.975))

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_R_to_D), 
                                             "ci_25"=quantile(predicted_diff_R_to_D, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_R_to_D, probs = c(0.975))))

# RECREATING FIGURE 4; THIS CODE CREATES THE LEFT AND RIGHT PANES OF THE FIGURE
# SEPARATELY

pdf(file = "fig4_left.pdf", family = "Times", height = 9, width = 7)
par(mar=c(5.1,18,2,.7))
plot(NULL, ylim=c(0.5, 13), xlim=c(0,0.50), axes=FALSE,  bg = "black",
     tck=-.02, cex.axis=0.9, cex=1.5,
     xlab="", ylab="", yaxt="n", xaxt="n", main="")
points(pes,
       c(12, 11, 10, 8, 7, 6, 5, 2, 3, 1),
       pch=19, cex=1.5)
segments(x0=ci_25,
         x1=ci_975,
         y0=c(12, 11, 10, 8, 7, 6, 5, 2, 3, 1), cex=1.25)
axis(1,at=c(0.00, 0.25, 0.50),
     labels=sprintf("%.2f",c(0.00, 0.25, 0.50)),cex.axis = 1.75, lwd=2)
axis(2,at=c(12, 11, 10, 8, 7, 6, 5, 3, 2, 1),
     las=2,
     labels=c("1st Quartile (<$5,000)", "2nd Quartile ($20,000)", 
              "3rd Quartile ($49,246)",
              "None", "1st Quartile ($28,891)", "2nd Quartile ($97,088)", 
              "3rd Quartile ($337,000)",
              "Democratic", "Independent", "Republican"),
     tck=0,
     lwd =0,
     line = 0,
     cex.axis=1.5)
axis(2,at=c(12.75,8.75,3.75),
     las=2,
     labels=c(expression(~bold(~underline("Lobbying Expenditures"))), 
              expression(~bold(~underline("Campaign Contributions"))), 
              expression(~bold(~underline("Partisan Alignment"))) 
     ),
     tck=0,
     lwd = 0,
     line = 0,
     cex.axis=1.75)
mtext("Pr(Engagement)", side=1, line = 3, cex =2.25)
text(pes,
     c(12, 11, 10, 8, 7, 6, 5, 2, 3, 1)+0.3,
     sprintf("%.2f",round(pes, 2)), cex=1.5)
dev.off()

pdf(file = "fig4_right.pdf", family = "Times", height = 9, width = 7)
par(mar=c(5.1,7,2,.7))
plot(NULL, ylim=c(0.5, 13), xlim=c(-0.05,0.15), axes=FALSE,  bg = "black",
     tck=-.02, cex.axis=0.9, cex=1.5,
     xlab="", ylab="", yaxt="n", xaxt="n", main="")
abline(v=0, lty=2, col="gray")
points(diff_quants$pe,
       c(11, 10, 7, 6, 5, 2, 1),
       pch=19, cex=1.5)
segments(x0=diff_quants$ci_25,
         x1=diff_quants$ci_975,
         y0=c(11, 10, 7, 6, 5, 2, 1), cex=1.25)
axis(1,at=c(-0.05, 0.00, 0.05, 0.10, 0.15),
     labels=c(sprintf("%.2f",c(-0.05, 0.00, 0.05, 0.10, 0.15))),cex.axis = 1.75, 
     lwd=2)
axis(2,at=c(11, 10, 7, 6, 5, 2, 1),
     las=2,
     labels=c("Q2-Q1", "Q3-Q2",
              "Q1-None", "Q2-Q1", "Q3-Q2",
              "Dem.-Ind.", "Dem.-Rep."),
     tck=0,
     lwd =0,
     line = 0,
     cex.axis=1.5)
mtext("Difference in Pr(Engagement)", side=1, line = 3, cex =2.25)
text(diff_quants$pe,
     c(11, 10, 7, 6, 5, 2, 1)+0.3,
     sprintf("%.2f",round(diff_quants$pe, 2)), cex=1.5)
dev.off()